from bs4 import BeautifulSoup
import requests
import csv
from time import sleep

# 设置请求头模拟浏览器访问
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

def scrape_month_data(year, month):
    """爬取指定年月的天气数据"""
    # 格式化月份为两位数
    month_str = f"{month:02d}"
    url = f'https://www.tianqihoubao.com/lishi/dalian/month/{year}{month_str}.html'

    try:
        response = requests.get(url, headers=headers)
        response.encoding = 'utf-8'
        soup = BeautifulSoup(response.text, 'lxml')

        # 定位天气数据表格
        table = soup.find('table', {'class': 'weather-table'})
        if not table:
            print(f"未找到表格数据: {year}年{month}月")
            return []

        monthly_data = []
        # 提取表格数据
        for row in table.find_all('tr')[1:]:  # 跳过表头行
            cols = row.find_all('td')
            if len(cols) >= 4:  # 确保有完整的数据列
                date = cols[0].get_text(strip=True) # 去除多余字符

                weather_day_night = cols[1].get_text(strip=True).split('/') # 把字符串单个分为数组的元素
                weather_day = weather_day_night[0].strip()
                weather_night = weather_day_night[1].strip() if len(weather_day_night) > 1 else ''

                temp = cols[2].get_text(strip=True).split('/')
                temp_high = temp[0].replace('℃', '').strip()
                temp_low = temp[1].replace('℃', '').strip() if len(temp) > 1 else ''

                wind_day_night = cols[3].get_text(strip=True).split('/')
                wind_day = wind_day_night[0].strip()
                wind_night = wind_day_night[1].strip() if len(wind_day_night) > 1 else ''

                monthly_data.append([date, weather_day, weather_night, temp_high, temp_low, wind_day, wind_night])

        print(f"成功爬取 {year}年{month}月 数据，共{len(monthly_data)}条记录")
        return monthly_data

    except Exception as e:
        print(f"爬取 {year}年{month}月 数据时出错: {e}")
        return []


def main():
    # 准备写入CSV文件
    with open('dalian_weather_2022-2024.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        # 写入表头
        writer.writerow(['日期', '白天天气', '夜间天气', '最高气温(℃)', '最低气温(℃)', '白天风力', '夜间风力'])

        # 爬取2022-2024年每月数据
        for year in range(2022, 2025):
            for month in range(1, 13):
                monthly_data = scrape_month_data(year, month)
                if monthly_data:
                    writer.writerows(monthly_data)
                sleep(0.5)

    print("所有数据已成功保存到 dalian_weather_2022-2024.csv")


if __name__ == '__main__':
    main()